/*==============================================================================
IC - UE - Add in female employment

Outline:
	I. 	Bring in data from Regional Yearbooks 
		A. 	1970 (DE, FR, IT, NL)
			1971 (UKN0)
		
	II. 	Bring in data from original sources
		A. Belgium
		B. Denmark
		C. Finland
		D. Spain
		E. Sweden
		
	III.	Append above sources

==============================================================================*/      

*===============================================================================
*I. 	Bring in data from Regional Yearbooks
*===============================================================================
cd "$insheet_files/Eurostat Regional Yearbook"

import excel "EYB_FEMP_1970.xls", first sheet("Processed") cellrange(A8:M125) clear

egen EMP = rsum(EMP_agriculture EMP_services EMP_industry)
egen EMP_male = rsum(EMP_male*)
egen EMP_female= rsum(EMP_female*)

keep nuts EMP EMP_female

replace nuts="DE11" if nuts=="DE145&DE112&DE113&DE114&DE11C&DE117&DE118&DE119&DE115&DE11D&DE116&DE11A&DE111&DE144"
replace nuts="DE12" if nuts=="DE12B&DE122&DE123&DE11B&DE127&DE125&DE126&DE129"
replace nuts="DE13" if nuts=="DE147&DE132&DE138&DE133&DE131&DE139&DE134&DE136&DE13A&DE121&DE124"
replace nuts="DE14" if nuts=="DE146&DE148&DE149&DE141&DE143&DE142&DE137&DE135&DE12C&DE12A"
replace nuts="DE73" if nuts=="DE724&DE73"
replace nuts="DE71&DE72" if nuts=="DE71&DE721&DE722&DE723&DE725"
replace nuts="DEB1" if nuts=="DEB1&DEB39&DEB3B&DEB35&DEB3J"
replace nuts="DEB3" if  nuts=="DEB31&DEB32&DEB33&DEB34&DEB36&DEB37&DEB38&DEB3A&DEB3C&DEB3D&DEB3E&DEB3F&DEB3G&DEB3H&DEB3I&DEB3K"
replace nuts="UKC&UKD1" if nuts=="UKC1&UKC2&UKD1"
save temp, replace

* collapse by nuts year
use temp, clear
replace nuts="DE91" if nuts=="DE911&DE912&DE91B&DE917&DE91A&DE916"|nuts=="DE911&DE912&DE91B&DE917"|nuts=="DE91A&DE925&DE926&DE918&DE916&DE919&DE915"
replace nuts="DE92" if nuts=="DE922&DE923&DE927&DE928&DE929"|nuts=="DE925&DE926&DE918&DE919&DE915"
replace nuts="DE93" if nuts=="DE931&DE93A&DE934&DE935&DE933&DE938&DE914&DE913"|nuts=="DE932&DE939&DE937&DE93B&DE936"
replace nuts="DE94" if nuts=="DE944&DE94E&DE949&DE94B"|nuts=="DE94C&DE947&DE942&DE94H"|nuts=="DE94A&DE945&DE94G&DE946&DE943&DE94D&DE941&DE948&DE94F"
replace nuts="DEA2" if nuts=="DEA22&DEA23&DEA24&DEA27&DEA2A&DEA2B&DEA2C"|nuts=="DEA2D&DEA29&DEA26&DEA28"

collapse (sum) EMP* , by(nuts)

gen EMP_share_female = EMP_female/ EMP

keep nuts EMP_female EMP_share_female

tempfile eyb_1970
save `eyb_1970.dta'

*===============================================================================
*II. 	Bring in data from original sources
*===============================================================================

*****************************   A. Belgium   **********************************
cd "$insheet_files/Belgium"

import excel "BE_FEMP_1970.xls", first cellrange(A3:D55) clear

drop if nuts==""

egen EMP = rsum(EMP*)

tempfile be_nuts3
save `be_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) EMP_female EMP, by(nuts2)

rename nuts2 nuts

tempfile be_nuts2
save `be_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) EMP_female EMP, by(nuts1)

rename nuts1 nuts

tempfile be_nuts1
save `be_nuts1.dta'

collapse (sum) EMP_female EMP

gen nuts="BE"

append using `be_nuts1.dta'
append using `be_nuts2.dta'
append using `be_nuts3.dta'

gen EMP_share_female = EMP_female/ EMP
keep nuts EMP_female EMP_share_female

*report in thousands
replace EMP_female = EMP_female/1000

tempfile be_EMP_female
save `be_EMP_female.dta'

*****************************   B. Denmark   **********************************
cd "$insheet_files/Denmark"

import excel using "DK_FEMP_1970.xls", first clear

drop if nuts==""

collapse (sum) EMP_1970 EMP_female, by(nuts)

tempfile dk_nuts3
save `dk_nuts3.dta'

gen nuts2 = substr(nuts,1,4)
collapse (sum) EMP_1970 EMP_female, by(nuts2)

rename nuts2 nuts

tempfile dk_nuts2
save `dk_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) EMP_1970 EMP_female, by(nuts1)

rename nuts1 nuts

tempfile dk_nuts1
save `dk_nuts1.dta'

collapse (sum) EMP_1970 EMP_female

gen nuts = "DK"

append using `dk_nuts1.dta'
append using `dk_nuts2.dta'
append using `dk_nuts3.dta'

gen EMP_share_female = EMP_female/ EMP_1970
keep nuts EMP_female EMP_share_female
rename EMP_female EMP_female

*report in thousands
replace EMP_female = EMP_female/1000

keep nuts EMP_female EMP_share_female

tempfile dk_EMP_female
save `dk_EMP_female.dta'

****************************   C. Finland   **********************************
cd "$insheet_files/Finland"
import excel "FI_FEMP_1970.xls", first clear

egen EMP = rsum(male* female*) 
egen EMP_female =rsum(female*)

collapse (sum) EMP*, by(nuts)

gen EMP_share_female = EMP_female/ EMP
keep nuts EMP_female EMP_share_female

*report in thousands
replace EMP_female = EMP_female/1000

keep nuts EMP_female EMP_share_female

tempfile fi_EMP_female
save `fi_EMP_female.dta'


*****************************   D. Spain   ************************************
cd "$insheet_files/Spain"

import excel "ES_FEMP_1970.xls", first clear
drop if nuts==""

gen EMP_female = EMP - EMP_male

collapse (sum) EMP*, by(nuts) 

tempfile es_nuts3
save `es_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) EMP*, by(nuts2)

rename nuts2 nuts

tempfile es_nuts2
save `es_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) EMP*, by(nuts1)

rename nuts1 nuts

tempfile es_nuts1
save `es_nuts1.dta'

collapse (sum) EMP*

gen nuts ="ES"

append using `es_nuts1.dta'
append using `es_nuts2.dta'
append using `es_nuts3.dta' 

gen EMP_share_female = EMP_female/ EMP
keep nuts EMP_female EMP_share_female

*report in thousands
replace EMP_female = EMP_female/1000

keep nuts EMP_female EMP_share_female

tempfile es_EMP_female
save `es_EMP_female.dta'

*************************   E) Sweden   ************************************
cd "$insheet_files/Sweden"

import excel "SE_FEMP_1970.xls", first clear

gen EMP_male = MAN
gen EMP_female = KVINNOR 
gen EMP = SAMTLIGA

collapse (sum) EMP*, by(nuts)

tempfile se_nuts3
save `se_nuts3.dta'

gen nuts2 = substr(nuts,1,4)

collapse (sum) EMP*, by(nuts2)

rename nuts2 nuts

tempfile se_nuts2
save `se_nuts2.dta'

gen nuts1 = substr(nuts,1,3)

collapse (sum) EMP*, by(nuts1)

rename nuts1 nuts

tempfile se_nuts1
save `se_nuts1.dta'

collapse (sum) EMP*

gen nuts ="SE"

append using `se_nuts1.dta'
append using `se_nuts2.dta'
append using `se_nuts3.dta' 

gen EMP_share_female = EMP_female/ EMP
keep nuts EMP_female EMP_share_female

*report in thousands
replace EMP_female = EMP_female/1000

tempfile se_EMP_female
save `se_EMP_female.dta'


*******************************************************************************		
*III.	Append all sources
clear

use `eyb_1970.dta'

append using `be_EMP_female.dta'
append using `dk_EMP_female.dta'
append using `fi_EMP_female.dta'
append using `es_EMP_female.dta'
append using `se_EMP_female.dta'

*-------------------------------------------------------------------------------
* 4. Merge data for codes that are the same for nuts2 and nuts3 (eg DE3 and DE30)
*-------------------------------------------------------------------------------
tempfile temp
save `temp.dta'

replace nuts="DE30" if nuts=="DE3" 
replace nuts="DE40" if nuts=="DE4"
replace nuts="DE50" if nuts=="DE5"
replace nuts="DE60" if nuts=="DE6"
replace nuts="DE80" if nuts=="DE8"
replace nuts="DEC0" if nuts=="DEC"
replace nuts="DEE0" if nuts=="DEE"
replace nuts="EL30" if nuts=="EL3"
replace nuts="ES30" if nuts=="ES3"
replace nuts="ES70" if nuts=="ES7"
replace nuts="FR10" if nuts=="FR1"
replace nuts="FR30" if nuts=="FR3"
replace nuts="UKN0" if nuts=="UKN"

collapse (mean)  EMP*, by(nuts)

* replace zeros from previous "collapses" with "."
foreach v of var EMP* {
replace `v'=. if `v'==0
}

append using `temp.dta' 

collapse (mean) EMP*, by(nuts)

* replace again zeros with "."
foreach v of var EMP* {
replace `v'=. if `v'==0
}

replace EMP_share_female = EMP_share_female*100

save "$dta_files/IC_EU_FEMP.dta", replace



